from pymongo import MongoClient

import jieba.posseg as pseg
import jieba
import sys

with open('../core.txt', 'r') as file:
    techwords = [w.strip().lower() for w in file.readlines()]

    for w in techwords:
        jieba.add_word(w, 10000, 'n')

client = MongoClient('127.0.0.1', 27017, connect = False)

qushu_resume_db = client['qushu_resume_db']
qushu_resume_coll = qushu_resume_db['qushu_resume_coll']


def token(desc):
    return list(set([word for word, flag in pseg.cut(desc.lower()) if len(word) > 1]))


n = 0
total = qushu_resume_coll.count()

for i in qushu_resume_coll.find({}, no_cursor_timeout = True):
    text = ''

    text += i['self_introduce']

    for e in i['work_experience']:
        text += '|' + e['describe']

    for e in i['proj_experience']:
        text += '|' + e['describe']

    qushu_resume_coll.update_one({'_id': i['_id']}, {'$set': {'token': token(text)}})

    n += 1

    percent = float(n) * 100 / float(total)
    sys.stdout.write("%.2f" % percent);
    sys.stdout.write("\r");
    sys.stdout.flush()
